home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Languguage OS 2
/
Languguage OS II Version 10-94 (Knowledge Media)(1994).ISO
/
gnu
/
libg_261.zip
/
libg_261
/
libg++
/
etc
/
trie-gen
/
compact.cc
< prev
next >
Wrap
C/C++ Source or Header
|
1994-02-06
|
13KB
|
402 lines
/* Compact a sparse 2-D matrix. Uses the Tarjan and Yao algorithm
taken from the article ``Storing a Sparse Table'' in CACM, 1979.
Copyright (C) 1989 Free Software Foundation, Inc.
written by Douglas C. Schmidt (schmidt@ics.uci.edu)
This file is part of GNU TRIE-GEN.
GNU TRIE-GEN is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 1, or (at your option)
any later version.
GNU TRIE-GEN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU trie-gen; see the file COPYING. If not, write to
the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
#include <stdio.h>
#include <builtin.h>
#include <limits.h>
#include <new.h>
#include <std.h>
#include <assert.h>
#include "renew.h"
#include "compact.h"
/* Essentially provides the functionality of `calloc.' */
static inline void *
operator new (size_t elem_size, int size)
{
void *temp = new char[elem_size * size];
memset (temp, 0, elem_size * size);
return temp;
}
/* Essentially combines the functionality of `realloc' and `calloc'. */
static inline void *
operator new (size_t elem_size, void *old_ptr, int old_size, int new_size)
{
old_ptr = new (old_ptr, new_size * elem_size) char;
memset ((char*)old_ptr + old_size * elem_size, 0,
(new_size - old_size) * elem_size);
return old_ptr;
}
/* Initializes the internal form in the case that the user passes
in a pointer to an already existing 2-dimensional matrix. Note
that by declaring the matrix to be a 1-dimensional array we
can perform the col and row offset calculations ourselves and
handle matrices with fixed, but arbitrary-sized columns and rows. */
Compact_Matrix::Compact_Matrix (ITEM_TYPE *mat, int rows, int cols):
matrix (mat), max_rows (rows), total_cols (cols), current_rows (rows)
{
init (rows);
for (int i = 0; i < max_rows; i++)
for (int j = 0; j < total_cols; j++)
if (matrix[i * total_cols + j] != 0)
{
ITEM_TYPE value = matrix[i * total_cols + j];
total_entries++;
row_vec[i].count++;
row_vec[i].col_list = new Column_Node (row_vec[i].col_list, j, value);
}
}
/* Initializer for the case where we don't have a previously created
matrix to play with. DEFAULT_ROWS represents the best first
approximation as to the number of rows in the matrix. However, this
buffer is resized as needed, so the algorithm isn't overly penalized
for a bad first guess. */
Compact_Matrix::Compact_Matrix (int default_rows): max_rows (default_rows)
{
current_rows = 0;
total_cols = 0;
matrix = 0;
init (default_rows);
}
void Compact_Matrix::init(int rows)
{
max_col_count = 0;
total_entries = 0;
compressed_len = -1;
row_offsets = 0;
checks = 0;
bucket_vec = 0;
values = 0;
row_vec = rows ? new Row_Node[rows] : 0;
}
/* Returns the `matrix[i][j]' item in the sparse 2-dimensional matrix.
Note that if the matrix is very sparse the number of items in each
COL_LIST will be very short, hence linear search is not too inefficent. */
ITEM_TYPE
Compact_Matrix::operator () (int i, int j)
{
assert (i >= 0 && i < current_rows && j >= 0);
for (Column_Node *col_list = row_vec[i].col_list; col_list; col_list = col_list->next)
if (col_list->index == j)
return col_list->value;
return 0;
}
/* Sets `matrix[i][j]' to VALUE. ROW_VEC is dynamically resized, if
necessary. At the moment the new entry is simply pushed onto the
linked list of COL_LIST entries. If there aren't many elements then
this will not be too inefficient for later retrieval. */
void
Compact_Matrix::operator () (int i, int j, ITEM_TYPE value)
{
if (i >= max_rows)
resize (((max_rows * 2) > (i + 1)) ? (max_rows * 2) : (i + 1));
row_vec[i].col_list = new Column_Node (row_vec[i].col_list, j, value);
total_entries++;
row_vec[i].count++;
if (current_rows < (i + 1))
current_rows = i + 1;
}
/* Enlarges the ROW_VEC from CURR_ROWS to NEW_SIZE. */
void
Compact_Matrix::resize (int new_size)
{
Row_Node *temp = new Row_Node[max_rows = new_size];
memcpy ((void *) temp, (void *) row_vec, current_rows * sizeof *row_vec);
delete row_vec;
row_vec = temp;
}
/* Calls the functions that compact the table and generate the resulting
lookup scheme. */
void
Compact_Matrix::output (void)
{
bucket_sort ();
first_fit_decreasing ();
output_arrays ();
output_lookup ();
}
/* Performs a bucket sort so that all rows with the same number of non-null
entries are treated as part of the same equivalence class. This
operation is very fast! */
void
Compact_Matrix::bucket_sort (void)
{
for (int i = 0; i < current_rows; i++)
{
if (max_col_count < row_vec[i].count)
max_col_count = row_vec[i].count;
}
bucket_vec = new (max_col_count + 1) Column_Node *;
for (i = 0; i < current_rows; i++)
bucket_vec[row_vec[i].count] = new Column_Node (bucket_vec[row_vec[i].count], i, 0);
}
/* Useful macros to clarify subsequent code. They should probably be made
into member functions... */
#define STARTING_ROW_OFFSET(X) (row_offsets[(X)])
#define LARGEST_COL_VALUE(X) (row_vec[(X)].col_list->index)
#define COL_LIST(X) (row_vec[(X)].col_list)
#define COL_COUNT(X) (row_vec[(X)].count)
#define COL_INDEX(X) ((X)->index)
#define ROW_INDEX(X) ((X)->index)
const int MAX_ASCII_RANGE = 128;
/* Performs sparse 2-dimensional array compaction suitable for use with the
`double-offset indexing' (used by Bison and FLEX to compact the size of
the sparse LR parsing tables and DFA's). This function implements the
`first fit decreasing' heuristic described in Tarjan and Yao's paper
``Storing a Sparse Table'' in CACM, 1979. */
void
Compact_Matrix::first_fit_decreasing (void)
{
/* Bit-vector and counter that records if a row/col location is already set. */
int current_max = current_rows
+ ((total_cols > MAX_ASCII_RANGE) ? total_cols : MAX_ASCII_RANGE);
char *already_assigned = (char *) malloc (current_max);
memset (already_assigned, 0, current_max);
row_offsets = new (current_rows) int;
values = new (current_max) ITEM_TYPE;
checks = new (current_max) int;
for (int row_index = max_col_count; row_index >= 0; row_index--)
if (bucket_vec[row_index])
/* Process every row in the `equivalence class' of rows containing the
same number of non-null column entries. */
for (Column_Node *rows_list = bucket_vec[row_index]; rows_list; rows_list = rows_list->next)
{
int row = ROW_INDEX (rows_list);
int row_offset = STARTING_ROW_OFFSET (row);
/* Process every column index in the current row. */
for (Column_Node *col_list = COL_LIST (row); ;)
{
int col = COL_INDEX (col_list);
/* If we exceed the boundaries it's time to resize various buffers. */
if (row_offset + col >= current_max)
{
int new_size = ((current_max > (row_offset + col)) ? current_max : (row_offset + col)) * 2;
already_assigned =
(char *) realloc (already_assigned, new_size);
memset (already_assigned + current_max, 0,
new_size - current_max);
values = new (values, current_max, new_size) int;
checks = new (checks, current_max, new_size) int;
current_max *= 2;
}
if (already_assigned[row_offset + col])
{
/* Efficiency hack to skip over obvious collisions. */
while (++row_offset + col < current_max && already_assigned[row_offset + col])
;
/* Reset col_list and begin again (with new row offset). */
col_list = COL_LIST (row);
}
else if ((col_list = col_list->next) == 0)
break;
}
/* No more collisions exist. Record the positions for the next round. */
for (col_list = COL_LIST (row); col_list; col_list = col_list->next)
{
int offset = row_offset + COL_INDEX (col_list);
already_assigned[offset] = 1;
if (compressed_len < offset)
compressed_len = offset;
values[offset] = col_list->value;
checks[offset] = row;
}
/* Need to reset this once all is said and done! */
STARTING_ROW_OFFSET (row) = row_offset;
}
free(already_assigned);
}
/* Generates the three arrays that comprise the `double-offset index'
scheme. This is a bit messy, since I'm trying to neatly format
the generated tables and also determine the smallest (in bytes)
type declarations necessary to represent the elements of the tables. */
void
Compact_Matrix::output_arrays ()
{
const int COL_WIDTH = 12;
int max_number = 0;
int count;
printf ("#if !defined(__STDC__) && !defined(__cplusplus)\n");
printf ("#define const\n");
printf ("#define signed\n");
printf ("#endif\n\n");
printf ("#define YY_LAST %d\n", compressed_len);
for (int i = 0; i < current_rows; i++)
{
if (max_number < row_offsets[i])
max_number = row_offsets[i];
}
count = max_number;
for (int field_width = 1; (count /= 10) > 0; field_width++)
;
printf ("\nstatic const unsigned %s yy_rows[%d] = \n{\n ",
max_number < UCHAR_MAX ? "char" : max_number < USHRT_MAX ? "short" : "int",
current_rows);
for (i = 0; i < current_rows; i++)
printf ("%*d,%s", field_width, row_offsets[i], (i + 1) % COL_WIDTH ? " " : "\n ");
max_number = 0;
for (i = 0; i < compressed_len + 1; i++)
{
if (max_number < checks[i])
max_number = checks[i];
}
count = max_number;
for (field_width = 1; (count /= 10) > 0; field_width++)
;
printf ("\n};\n\nstatic const unsigned %s yy_check[%d] = \n{\n ",
max_number < UCHAR_MAX ? "char" : max_number < USHRT_MAX ? "short" : "int",
compressed_len + 1);
for (i = 0; i < compressed_len + 1; i++)
printf ("%*d,%s", field_width, checks[i], (i + 1) % COL_WIDTH ? " " : "\n ");
max_number = 0;
for (i = 0; i < compressed_len + 1; i++)
#ifdef __GNUC__
max_number >?= abs (values[i]);
#else
{
int tmp = abs (values[i]);
if (max_number < tmp)
max_number = tmp;
}
#endif
count = max_number;
for (field_width = 2; (count /= 10) > 0; field_width++)
;
printf ("\n};\n\nstatic const %s yy_next[%d] = \n{\n ",
max_number < SCHAR_MAX ? (CHAR_MIN == 0 ? "signed char" : "char")
: max_number < SHRT_MAX ? "short" : "int",
compressed_len + 1);
for (i = 0; i <= compressed_len; i++)
printf ("%*d,%s", field_width, values[i], (i + 1) % COL_WIDTH ? " " : "\n ");
printf ("\n};\n\n");
}
/* Generates the `double-offset index' function, that provides the
value stored in the location referenced by parameters ROW and COL. */
void
Compact_Matrix::output_lookup (void)
{
printf ("static inline int\nnext_state "
"(int row, int col)\n{\n int state_index = yy_rows[row] + col;\n\n"
" if (state_index > YY_LAST || yy_check[state_index] != row)\n "
"return 0;\n else\n return yy_next[state_index];\n}\n");
}
/* Useful debugging routine. */
void
Compact_Matrix::dump_entries (void)
{
for (int i = 0; i < current_rows; i++)
if (row_vec[i].col_list)
{
Column_Node *temp = row_vec[i].col_list;
fprintf (stderr, "row %d's count = %d, cols = ", i, row_vec[i].count);
do { fprintf (stderr, "%d ", temp->index); } while (temp = temp->next);
putc ('\n', stderr);
}
}
/* Useful debugging routine. */
void
Compact_Matrix::dump_bucket (void)
{
for (int i = 0; i < total_entries; i++)
if (bucket_vec[i])
{
Column_Node *temp = bucket_vec[i];
fprintf (stderr, "bucket %d = ", i);
do { fprintf (stderr, "%d ", temp->index); } while (temp = temp->next);
putc ('\n', stderr);
}
}